Force reinstall of operator resources on release version upgrade#1746
Conversation
| func (r *OpenStackReconciler) deleteAllOwnedResources(ctx context.Context, instance *operatorv1beta1.OpenStack) error { | ||
| Log := r.GetLogger(ctx) | ||
| Log.Info("Deleting all owned resources for release version upgrade") | ||
|
|
||
| // Delete all owned deployments | ||
| deployments := &appsv1.DeploymentList{} | ||
| err := r.List(ctx, deployments, &client.ListOptions{Namespace: instance.Namespace}) | ||
| if err != nil { | ||
| return errors.Wrap(err, "failed to list deployments") | ||
| } | ||
| for _, deployment := range deployments.Items { | ||
| if metav1.IsControlledBy(&deployment, instance) { | ||
| Log.Info("Deleting deployment", "name", deployment.Name) | ||
| err := r.Delete(ctx, &deployment) | ||
| if err != nil && !apierrors.IsNotFound(err) { | ||
| return errors.Wrapf(err, "failed to delete deployment %s", deployment.Name) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Delete all owned service accounts | ||
| serviceAccounts := &corev1.ServiceAccountList{} | ||
| err = r.List(ctx, serviceAccounts, &client.ListOptions{Namespace: instance.Namespace}) | ||
| if err != nil { | ||
| return errors.Wrap(err, "failed to list service accounts") | ||
| } | ||
| for _, sa := range serviceAccounts.Items { | ||
| if metav1.IsControlledBy(&sa, instance) { | ||
| Log.Info("Deleting service account", "name", sa.Name) | ||
| err := r.Delete(ctx, &sa) | ||
| if err != nil && !apierrors.IsNotFound(err) { | ||
| return errors.Wrapf(err, "failed to delete service account %s", sa.Name) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Delete all owned services | ||
| services := &corev1.ServiceList{} | ||
| err = r.List(ctx, services, &client.ListOptions{Namespace: instance.Namespace}) | ||
| if err != nil { | ||
| return errors.Wrap(err, "failed to list services") | ||
| } | ||
| for _, svc := range services.Items { | ||
| if metav1.IsControlledBy(&svc, instance) { | ||
| Log.Info("Deleting service", "name", svc.Name) | ||
| err := r.Delete(ctx, &svc) | ||
| if err != nil && !apierrors.IsNotFound(err) { | ||
| return errors.Wrapf(err, "failed to delete service %s", svc.Name) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Delete webhooks (these are cluster-scoped and not owned, but managed by label) | ||
| valWebhooks, err := r.Kclient.AdmissionregistrationV1().ValidatingWebhookConfigurations().List(ctx, metav1.ListOptions{ | ||
| LabelSelector: "openstack.openstack.org/managed=true", | ||
| }) | ||
| if err != nil { | ||
| return errors.Wrap(err, "failed listing validating webhook configurations") | ||
| } | ||
| for _, webhook := range valWebhooks.Items { | ||
| Log.Info("Deleting validating webhook", "name", webhook.Name) | ||
| err := r.Kclient.AdmissionregistrationV1().ValidatingWebhookConfigurations().Delete(ctx, webhook.Name, metav1.DeleteOptions{}) | ||
| if err != nil && !apierrors.IsNotFound(err) { | ||
| return errors.Wrapf(err, "failed to delete validating webhook %s", webhook.Name) | ||
| } | ||
| } | ||
|
|
||
| mutWebhooks, err := r.Kclient.AdmissionregistrationV1().MutatingWebhookConfigurations().List(ctx, metav1.ListOptions{ | ||
| LabelSelector: "openstack.openstack.org/managed=true", | ||
| }) | ||
| if err != nil { | ||
| return errors.Wrap(err, "failed listing mutating webhook configurations") | ||
| } | ||
| for _, webhook := range mutWebhooks.Items { | ||
| Log.Info("Deleting mutating webhook", "name", webhook.Name) | ||
| err := r.Kclient.AdmissionregistrationV1().MutatingWebhookConfigurations().Delete(ctx, webhook.Name, metav1.DeleteOptions{}) | ||
| if err != nil && !apierrors.IsNotFound(err) { | ||
| return errors.Wrapf(err, "failed to delete mutating webhook %s", webhook.Name) | ||
| } | ||
| } | ||
|
|
||
| Log.Info("All owned resources deleted successfully") | ||
| return nil | ||
| } |
There was a problem hiding this comment.
Be really nice if this could be done with some Generics to avoid the repetition. Does something like this work?
bshephar@80a5be2
(I don't have a OCP / OKD env to validate. So just throwing out a suggestion for you.)
5080ea0 to
73c5855
Compare
bshephar
left a comment
There was a problem hiding this comment.
Nice, looks good.
It could probably be tightened up to avoid the runtime assertions by using client.ObjectList instead of any for the list arg. I guess all of those types should match client.ObjectList. But that could just be a nice follow up patch imo. Something like:
-func deleteOwnedResources[L any, T any](
+func deleteOwnedResources[L client.ObjectList, T any](
ctx context.Context,
r *OpenStackReconciler,
instance client.Object,
@@ -359,7 +359,7 @@ func deleteOwnedResources[L any, T any](
) error {
log := r.GetLogger(ctx)
- err := r.List(ctx, any(list).(client.ObjectList), &client.ListOptions{Namespace: instance.GetNamespace()})
+ err := r.List(ctx, list, &client.ListOptions{Namespace: instance.GetNamespace()})
if err != nil {
return errors.Wrap(err, "failed to list resources")
}
I think that should still work.
Happy New Year btw.
|
@bshephar: changing LGTM is restricted to collaborators DetailsIn response to this:
Instructions for interacting with me using PR comments are available here. If you have questions or suggestions related to my behavior, please file an issue against the kubernetes-sigs/prow repository. |
73c5855 to
5468459
Compare
|
/test precommit-check |
|
could you add a comment to let golangci-lint to ignore that package name |
5468459 to
e9798cf
Compare
|
New changes are detected. LGTM label has been removed. |
done |
|
/retest |
| ) error { | ||
| log := r.GetLogger(ctx) | ||
|
|
||
| err := r.List(ctx, any(list).(client.ObjectList), &client.ListOptions{Namespace: instance.GetNamespace()}) |
There was a problem hiding this comment.
I don't think the casting is necessary now that you have made L a client.ObjectList. I think it should work like this now:
| err := r.List(ctx, any(list).(client.ObjectList), &client.ListOptions{Namespace: instance.GetNamespace()}) | |
| err := r.List(ctx, list, &client.ListOptions{Namespace: instance.GetNamespace()}) |
|
@bshephar: changing LGTM is restricted to collaborators DetailsIn response to this:
Instructions for interacting with me using PR comments are available here. If you have questions or suggestions related to my behavior, please file an issue against the kubernetes-sigs/prow repository. |
|
[APPROVALNOTIFIER] This PR is APPROVED This pull-request has been approved by: bshephar, dprince, stuggi The full list of commands accepted by this bot can be found here. The pull request process is described here DetailsNeeds approval from an approver in each of these files:
Approvers can indicate their approval by writing |
Fixes upgrade failures from 0.4 to main caused by incompatible webhook configuration changes that trigger index out of range panics during manifest merging. When OPENSTACK_RELEASE_VERSION is bumped, the controller now: - Detects the version change by comparing against status.ReleaseVersion - Deletes all owned resources (deployments, services, serviceaccounts, configmaps) - Removes managed webhooks (validating and mutating configurations) - Requeues to recreate resources with new manifests This one-time cleanup ensures a clean slate for incompatible upgrades where the structure of resources (especially webhooks) has changed between versions. Adds ReleaseVersion field to OpenStackStatus to track the deployed version. Also, adds bounds checking to skip copying clientConfig for new webhooks that don't exist in the current configuration, allowing the merge to complete successfully. Jira: OSPRH-23865 Co-authored-by: Brendan Shephard <bshephar@fedora-g16.bne-home.net>
e9798cf to
2306cc6
Compare
|
Build failed (check pipeline). Post https://softwarefactory-project.io/zuul/t/rdoproject.org/buildset/ca7a76cdab8e43d2a4e0c7590a9eef08 ✔️ openstack-k8s-operators-content-provider SUCCESS in 2h 29m 07s |
|
recheck |
|
Build failed (check pipeline). Post https://softwarefactory-project.io/zuul/t/rdoproject.org/buildset/33f5fa03d0a7401e9beefb4d161fa16f ✔️ openstack-k8s-operators-content-provider SUCCESS in 1h 53m 02s |
|
recheck |
|
Build failed (check pipeline). Post https://softwarefactory-project.io/zuul/t/rdoproject.org/buildset/f326743057c348008530c19a999888b0 ✔️ openstack-k8s-operators-content-provider SUCCESS in 2h 21m 39s |
|
recheck |
|
Build failed (check pipeline). Post https://softwarefactory-project.io/zuul/t/rdoproject.org/buildset/2055c372045043c785f59c9ab418df4a ✔️ openstack-k8s-operators-content-provider SUCCESS in 3h 22m 54s |
|
recheck |
38e6308
into
openstack-k8s-operators:main
Fixes upgrade failures from 0.4 to main caused by incompatible webhook configuration changes that trigger index out of range panics during manifest merging.
When OPENSTACK_RELEASE_VERSION is bumped, the controller now:
This one-time cleanup ensures a clean slate for incompatible upgrades where the structure of resources (especially webhooks) has changed between versions.
Adds ReleaseVersion field to OpenStackStatus to track the deployed version.
Jira: OSPRH-23865